import pandas as pd
import matplotlib.pyplot as plt


colors = pd.read_csv("data/colors.csv")
colors.head()


# Number of Unique Entries in rgb (Colour)
colors.rgb.nunique()

124


# Number of Unique Entries in name (Colour Name)
colors["name"].nunique()

135


colors.groupby("is_trans").count()


# .value_counts() Function
colors["is_trans"].value_counts()

f    107
t     28
Name: is_trans, dtype: int64


sets = pd.read_csv("data/sets.csv")
sets.head()


sets["year"].min()

1949


sets[sets["year"] == 1949]


sets.sort_values("num_parts", ascending=False)


sets_by_year = sets.groupby("year").count()
sets_by_year


sets_by_year["set_num"].head()

year
1949     5
1950     6
1953     4
1954    14
1955    28
Name: set_num, dtype: int64


plt.plot(sets_by_year.index, sets_by_year.set_num)
# This is misleading because the last two entries in the column (2021, 2020) are estimates

[<matplotlib.lines.Line2D at 0x7fb49034ea90>]


sets_by_year.tail()


plt.plot(sets_by_year.index[:-2], sets_by_year.set_num[:-2])

[<matplotlib.lines.Line2D at 0x7fb4a107a5b0>]


theme_by_year = sets.groupby("year").agg({"theme_id": pd.Series.nunique})
theme_by_year.head()


theme_by_year.rename(columns={"theme_id": "nr_themes"}, inplace=True)
theme_by_year.head()


plt.plot(theme_by_year.index[:-2], theme_by_year.nr_themes[:-2])

[<matplotlib.lines.Line2D at 0x7fb4901cfb50>]


# Number of themes & Number of Sets in same plot with same axes
plt.plot(sets_by_year.index[:-2], sets_by_year.set_num[:-2])
plt.plot(theme_by_year.index[:-2], theme_by_year.nr_themes[:-2])

[<matplotlib.lines.Line2D at 0x7fb4a110d190>]


# We use axis object in Matplotlib
ax1 = plt.gca()  # Get Current Axis
ax2 = plt.twinx()  # Create another axis which shares the same x-axis

ax1.plot(sets_by_year.index[:-2], sets_by_year.set_num[:-2], color="b")
ax2.plot(theme_by_year.index[:-2], theme_by_year.nr_themes[:-2], color="orange")

ax1.set_xlabel("Year")
ax1.set_ylabel("Number of Sets", color="blue")
ax2.set_ylabel("Number of Themes", color="orange")

Text(0, 0.5, 'Number of Themes')


parts_per_year = sets.groupby("year").agg({"num_parts": pd.Series.mean})
parts_per_year.head()


parts_per_year.tail()


plt.scatter(parts_per_year.index[:-2], parts_per_year.num_parts[:-2])

<matplotlib.collections.PathCollection at 0x7fb4c254f880>


set_theme_count = sets["theme_id"].value_counts()
set_theme_count[:5]

158    753
501    656
494    398
435    356
503    329
Name: theme_id, dtype: int64


themes = pd.read_csv("data/themes.csv")
themes.head()


themes[themes["name"] == "Star Wars"]


sets[sets.theme_id == 18]


sets[sets.theme_id == 209]


set_theme_df = pd.DataFrame({
    "id": set_theme_count.index,
    "set_count": set_theme_count.values,
})
set_theme_df.head()


merged_df = pd.merge(set_theme_df, themes, on="id")
merged_df.head()


plt.bar(merged_df.name[:10], merged_df.set_count[:10])
# X-Axis is clustered

<BarContainer object of 10 artists>


plt.figure(figsize=(14,8))
plt.xticks(fontsize=14, rotation=45)
plt.yticks(fontsize=14)
plt.ylabel('Nr of Sets', fontsize=14)
plt.xlabel('Theme Name', fontsize=14)
 
plt.bar(merged_df.name[:10], merged_df.set_count[:10])

<BarContainer object of 10 artists>


print("That's It!")

That's It!

	id	name	rgb	is_trans
0	-1	Unknown	0033B2	f
1	0	Black	05131D	f
2	1	Blue	0055BF	f
3	2	Green	237841	f
4	3	Dark Turquoise	008F9B	f

	set_num	name	year	theme_id	num_parts
0	001-1	Gears	1965	1	43
1	0011-2	Town Mini-Figures	1978	84	12
2	0011-3	Castle 2 for 1 Bonus Offer	1987	199	0
3	0012-1	Space Mini-Figures	1979	143	12
4	0013-1	Space Mini-Figures	1979	143	12

	set_num	name	year	theme_id	num_parts
9521	700.1-1	Extra-Large Gift Set (ABB)	1949	365	142
9534	700.2-1	Large Gift Set (ABB)	1949	365	178
9539	700.3-1	Medium Gift Set (ABB)	1949	365	142
9544	700.A-1	Small Brick Set (ABB)	1949	371	24
9545	700.B-1	Small Doors and Windows Set (ABB)	1949	371	12

	set_num	name	year	theme_id	num_parts
15004	BIGBOX-1	The Ultimate Battle for Chima	2015	571	9987
11183	75192-1	UCS Millennium Falcon	2017	171	7541
10551	71043-1	Hogwarts Castle	2018	246	6020
295	10256-1	Taj Mahal	2017	673	5923
221	10189-1	Taj Mahal	2008	673	5922
...	...	...	...	...	...
1782	20216-1	MBA Robot & Micro Designer (Kits 2 - 3 Redesign)	2013	432	0
1780	20214-1	MBA Adventure Designer (Kits 7 - 9 Redesign)	2013	432	0
6822	5005539-1	Brick Pouch (Yellow)	2018	501	0
9026	66319-1	Power Miners 3 in 1 Superpack	2009	439	0
12946	853471-1	C-3PO Key Chain	2015	503	0

	set_num	name	theme_id	num_parts
year
1949	5	5	5	5
1950	6	6	6	6
1953	4	4	4	4
1954	14	14	14	14
1955	28	28	28	28
...	...	...	...	...
2017	786	786	786	786
2018	816	816	816	816
2019	840	840	840	840
2020	674	674	674	674
2021	3	3	3	3

Lego Analysis: Introduction¶

Import Statements¶

Data Exploration¶

Understanding LEGO Themes vs. LEGO Sets¶

Aggregate Data with the Python .agg() Function¶

Line Charts with Two Seperate Axes¶

Scatter Plots in Matplotlib¶

Number of Sets per LEGO Theme¶

Database Schema¶

Database Schemas, Foreign Keys and Merging DataFrames¶

Merging (i.e., Combining) DataFrames based on a Key¶

	num_parts
year
1949	99.600000
1950	1.000000
1953	13.500000
1954	12.357143
1955	36.607143

	num_parts
year
2017	221.840967
2018	213.618873
2019	207.510714
2020	259.732938
2021	0.000000

	id	name	parent_id
0	1	Technic	NaN
1	2	Arctic Technic	1.0
2	3	Competition	1.0
3	4	Expert Builder	1.0
4	5	Model	1.0

	id	name	parent_id
17	18	Star Wars	1.0
150	158	Star Wars	NaN
174	209	Star Wars	207.0
211	261	Star Wars	258.0

	set_num	name	year	theme_id	num_parts
8786	65081-1	R2-D2 / C-3PO Droid Collectors Set	2002	18	1
12051	8000-1	Pit Droid	2000	18	223
12058	8001-1	Battle Droid	2000	18	336
12066	8002-1	Destroyer Droid	2000	18	567
12071	8007-1	C-3PO	2001	18	339
12072	8008-1	Stormtrooper	2001	18	360
12073	8009-1	R2-D2	2002	18	239
12074	8010-1	Darth Vader	2002	18	388
12081	8011-1	Jango Fett	2002	18	425
12083	8012-1	Super Battle Droid	2002	18	378
15273	K8008-1	Darth Vader / Stormtrooper Kit	2002	18	0

	set_num	name	year	theme_id	num_parts
11013	75023-1	Star Wars Advent Calendar 2013	2013	209	254
11046	75056-1	Star Wars Advent Calendar 2014	2014	209	273
11080	75097-1	Star Wars Advent Calendar 2015	2015	209	291
11131	75146-1	Star Wars Advent Calendar 2016	2016	209	282
11173	75184-1	Star Wars Advent Calendar 2017	2017	209	309
11206	75213-1	Star Wars Advent Calendar 2018	2018	209	307
11245	75245-1	Star Wars Advent Calendar 2019	2019	209	280
11281	75279-1	Star Wars Advent Calendar 2020	2020	209	312
12019	7958-1	Star Wars Advent Calendar 2011	2011	209	267
14352	9509-1	Star Wars Advent Calendar 2012	2012	209	235

	id	set_count	name	parent_id
0	158	753	Star Wars	NaN
1	501	656	Gear	NaN
2	494	398	Friends	NaN
3	435	356	Ninjago	NaN
4	503	329	Key Chain	501.0